Descriptive Statistics of NYC Shooting Data

library(ggplot2)
library(readxl)
library(readr)
library(tidyverse)
library(dplyr)
library(sf)
library(plotly)
library(geojsonio)
library(knitr)
library(tidyr)
# Load the dataset
df_descriptive=read_csv("data_final.csv")
data_final <- read_csv("data_final.csv")

EDA sorted by neighborhood

Table showing Top 10 total incident NTA in Each Borough

# Table showing Top 10 total incident NTA in Each Borough
incident_by_nta_borough <- df_descriptive %>%
  drop_na() %>%
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n()) %>%
  arrange(BORO, desc(total_incidents)) %>%
  group_by(BORO) %>%
  slice_max(n = 10, order_by = total_incidents) %>%
  select(-total_incidents) %>%
  group_by(BORO) %>%
  mutate(row_num = row_number()) %>%
  pivot_wider(names_from = BORO, values_from = NTA) %>%
  unnest(cols = c(BRONX, BROOKLYN, MANHATTAN, QUEENS, `STATEN ISLAND`)) %>%
  select(row_num, everything())%>%
  slice(1:10)

# Display the table in the desired format
kable(incident_by_nta_borough, caption = "Top 10 NTAs with Total Shooting Incidents in Each Borough (2017-2023)")
Top 10 NTAs with Total Shooting Incidents in Each Borough (2017-2023)
row_num BRONX BROOKLYN MANHATTAN QUEENS STATEN ISLAND
1 Mott Haven-Port Morris Brownsville East Harlem (North) Far Rockaway-Bayswater St. George-New Brighton
2 Mount Hope Bedford-Stuyvesant (East) Harlem (North) Baisley Park Tompkinsville-Stapleton-Clifton-Fox Hills
3 Tremont Crown Heights (North) East Harlem (South) Richmond Hill West New Brighton-Silver Lake-Grymes Hill
4 Mount Eden-Claremont (West) East New York-New Lots Harlem (South) Jamaica Mariner’s Harbor-Arlington-Graniteville
5 Concourse-Concourse Village East Flatbush-Remsen Village Washington Heights (South) Rockaway Beach-Arverne-Edgemere Annadale-Huguenot-Prince’s Bay-Woodrow
6 Williamsbridge-Olinville East New York (North) Chelsea-Hudson Yards St. Albans Port Richmond
7 Melrose Bedford-Stuyvesant (West) Hamilton Heights-Sugar Hill South Ozone Park Rosebank-Shore Acres-Park Hill
8 Belmont Canarsie Inwood Elmhurst Grasmere-Arrochar-South Beach-Dongan Hills
9 Longwood Coney Island-Sea Gate Chinatown-Two Bridges South Jamaica Great Kills-Eltingville
10 Fordham Heights East New York-City Line Washington Heights (North) Astoria (East)-Woodside (North) New Dorp-Midland Beach

Table showing Top 10 NTAs with the Highest Incident Rate

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate <- top_10_incident_rate %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in Each Borough")
Top 10 NTAs with the Highest Incident Rate by Year in Each Borough
NTA BORO incident_rate total_population percent_poverty percent_education
Claremont Village-Claremont (East) BRONX 0.187350 24553 43.2 61.7
Brownsville BROOKLYN 0.153795 60470 37.2 76.9
Tremont BRONX 0.118196 32150 36.3 71.4
Inwood MANHATTAN 0.115706 36299 15.3 77.0
East New York-New Lots BROOKLYN 0.107539 53004 25.3 82.6
Hunts Point BRONX 0.105743 15131 34.3 72.2
Mount Eden-Claremont (West) BRONX 0.100703 49651 31.8 67.9
Brownsville BROOKLYN 0.099223 60470 37.2 76.9
Brownsville BROOKLYN 0.097569 60470 37.2 76.9
Tremont BRONX 0.096423 32150 36.3 71.4

Plot Total Number of Incidents Across NYC BOROs (2017-2023)

Process of data manipulation

Plot of Total Number of Incidents Across NYC BOROs (2017-2023)

data_final <- read_csv("data_final.csv")
boro_map_data <- boro_map_data %>%
  mutate(
    hover_text = paste("Borough:", boro_name, "<br>Total Incidents:", Number_of_Incidents)
  )

# Create the interactive plot with click functionality
plot <- plot_ly(
  data = boro_map_data,
  type = "scattermapbox",
  split = ~boro_name,  # Separate polygons by boroughs
  color = ~Number_of_Incidents,  # Color based on the number of incidents
  colors = "viridis",  # Use a color scale
  text = ~hover_text,  # Display hover text
  hoverinfo = "text",
  marker = list(size = 8, opacity = 0.7)
) %>%
  layout(
    title = "Total Number of Incidents Across NYC BOROs (2017-2023)",
    mapbox = list(
      style = "carto-positron",  # Base map style
      center = list(lon = -74.0060, lat = 40.7128),  # Center map on NYC
      zoom = 10
    )
  )

# Add click functionality to display the borough name and number of incidents
plot <- plot %>%
  event_register("plotly_click") %>%
  htmlwidgets::onRender("
    function(el, x) {
      el.on('plotly_click', function(d) {
        var point = d.points[0];
        var text = point.text;
        alert('You clicked on: ' + text);
      });
    }
  ")

# Display the interactive plot
plot

The map shows the total number of incidents across NYC boroughs (2017–2023), with each borough represented by a distinct color. A gradient is used to indicate the magnitude of incidents, with darker shades corresponding to higher counts, ranging from 0 to over 3,000 incidents. Each borough is outlined and filled with its respective color, making it easy to distinguish. The legend on the right identifies the boroughs (Bronx, Brooklyn, Manhattan, Queens, Staten Island) and aligns with the color gradient to show the number of incidents. This visualization highlights geographical disparities in incident frequency across the boroughs, aiding in understanding spatial distribution patterns.

Plot Total Number of Incidents Across NYC CDTAs from 2017 to 2023

Removing NA

## There is space between letter and number in CDTA, I deleted the space below
data_final$CDTA <- gsub(" ", "", data_final$CDTA)

cdta_incident_counts <- data_final %>%
  group_by(CDTA) %>%
  summarise(Number_of_Incidents = n(), .groups = "drop")

# Remove any trailing spaces or mismatches in CDTA identifiers:
cdta_shape$CDTA2020 <- gsub(" ", "", cdta_shape$CDTA2020)
data_final$CDTA <- gsub(" ", "", data_final$CDTA)

# Identify Missing Matches
unmatched_cdta <- setdiff(cdta_shape$CDTA2020, data_final$CDTA)

#Re-Merge the Data
cdta_map_data <- cdta_shape %>%
  left_join(cdta_incident_counts, by = c("CDTA2020" = "CDTA"))

# Update NA Handling
cdta_map_data <- cdta_map_data %>%
  mutate(
    Number_of_Incidents = ifelse(is.na(Number_of_Incidents), 0, Number_of_Incidents),
    Incident_Range = cut(
      Number_of_Incidents,
      breaks = seq(0, 600, by = 120), 
      labels = c("0-120", "121-240", "241-360", "361-480", "481-600"),
      include.lowest = TRUE
    )
  )

“QN80” “SI95” “QN84” are my NAs. There is no shooting incident in these CDTAs. I assigned these 3 CDTA with 0 incident.

Plot of Total Number of Incidents Across NYC CDTAs from 2017 to 2023

ggplot(data = cdta_map_data) +
  geom_sf(aes(fill = Incident_Range), color = "white", size = 0.2) +
 geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") +  # Add labels+
  scale_fill_manual(
    values = c(
      "0-120" = "#b2e2e2",
      "121-240" = "skyblue",
      "241-360" = "#66c2a4",
      "361-480" = "#2ca25f",
      "481-600" = "#006d2c"
    ),
    name = "Number of Incidents"
  ) +
  labs(
    title = "Total Number of Incidents Across NYC CDTAs from 2017 to 2023",
    subtitle = "Incidents grouped by range (0-600, 120 breaks)",
    caption = "Data Source: Your dataset"
  ) +
  theme_minimal() +
  theme(
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank()
  )

The map shows NYC CDTA incidents (2017–2023) using a gradient from light blue (fewer incidents) to dark green (more incidents), with counts labeled on each district. Dark green areas highlight hotspots, likely in densely populated regions, while lighter blue areas, like Staten Island, show fewer incidents. This visualization helps identify trends and prioritize safety efforts.

for shiny

Top 10 NTAs with the Highest Total Shooting Incidents in each boro

MANHATTAN

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in MANHATTAN
top_10_nta_overall_manhattan <- df_descriptive %>%
  filter(BORO == 'MANHATTAN') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_manhattan, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in MANHATTAN")
Top 10 NTAs with the Highest Total Shooting Incidents in MANHATTAN
BORO NTA total_incidents total_population percent_poverty percent_education
MANHATTAN Harlem (North) 253 83327 25.1 82.9
MANHATTAN East Harlem (North) 229 64655 32.3 75.9
MANHATTAN East Harlem (South) 145 59814 29.0 78.3
MANHATTAN Harlem (South) 130 47113 18.8 87.4
MANHATTAN Inwood 105 36299 15.3 77.0
MANHATTAN Washington Heights (South) 88 72037 19.9 70.3
MANHATTAN Washington Heights (North) 67 71842 14.8 79.3
MANHATTAN Hamilton Heights-Sugar Hill 54 49410 24.0 78.6
MANHATTAN Manhattanville-West Harlem 46 22183 26.7 77.4
MANHATTAN East Village 41 71436 24.3 87.7

BROOKLYN

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
top_10_nta_overall_brooklyn <- df_descriptive %>%
  filter(BORO == 'BROOKLYN') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_brooklyn, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN")
Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
BORO NTA total_incidents total_population percent_poverty percent_education
BROOKLYN Brownsville 350 60470 37.2 76.9
BROOKLYN Bedford-Stuyvesant (East) 323 84653 22.2 87.8
BROOKLYN Crown Heights (North) 235 85275 20.5 84.1
BROOKLYN Bedford-Stuyvesant (West) 207 89189 26.0 84.8
BROOKLYN East New York-New Lots 205 53004 25.3 82.6
BROOKLYN Canarsie 166 89932 13.0 88.1
BROOKLYN East New York (North) 148 42818 25.5 79.4
BROOKLYN East Flatbush-Remsen Village 126 39115 18.7 83.7
BROOKLYN Ocean Hill 120 37952 27.3 83.1
BROOKLYN Flatbush 111 66503 13.7 87.8

BRONX

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
top_10_nta_overall_bronx <- df_descriptive %>%
  filter(BORO == 'BRONX') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_bronx, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in BRONX")
Top 10 NTAs with the Highest Total Shooting Incidents in BRONX
BORO NTA total_incidents total_population percent_poverty percent_education
BRONX Mott Haven-Port Morris 207 57718 43.7 62.0
BRONX Concourse-Concourse Village 169 69387 31.9 70.5
BRONX Mount Eden-Claremont (West) 166 49651 31.8 67.9
BRONX Williamsbridge-Olinville 158 61346 21.0 76.6
BRONX Melrose 152 42651 40.7 68.2
BRONX Mount Hope 149 49099 30.2 66.8
BRONX Tremont 138 32150 36.3 71.4
BRONX Longwood 130 40289 32.0 66.9
BRONX Belmont 116 35825 39.6 65.6
BRONX Fordham Heights 112 32099 33.2 66.0

QUEENS

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
top_10_nta_overall_queens <- df_descriptive %>%
  filter(BORO == 'QUEENS') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_queens, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in QUEENS")
Top 10 NTAs with the Highest Total Shooting Incidents in QUEENS
BORO NTA total_incidents total_population percent_poverty percent_education
QUEENS Far Rockaway-Bayswater 116 58648 18.3 79.9
QUEENS Rockaway Beach-Arverne-Edgemere 100 41367 22.1 82.1
QUEENS St. Albans 93 51816 8.6 88.8
QUEENS South Jamaica 91 44401 13.4 78.9
QUEENS Baisley Park 83 43090 11.4 83.5
QUEENS Jamaica 82 60993 15.7 70.4
QUEENS Queensbridge-Ravenswood-Dutch Kills 79 32954 16.5 85.0
QUEENS South Ozone Park 66 79540 9.7 76.7
QUEENS Richmond Hill 55 34100 10.6 79.8
QUEENS Laurelton 45 26088 5.4 91.2

STATEN ISLAND

# Table showing Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN
top_10_nta_overall_staten <- df_descriptive %>%
  filter(BORO == 'STATEN ISLAND') %>% 
  group_by(BORO, NTA) %>%
  summarise(total_incidents = n(),
            total_population = first(Total_population_nta),
            percent_poverty = first(Percent_poverty),
            percent_education = first(Percent_education)) %>%
  arrange(desc(total_incidents)) %>%
  ungroup() %>%
  slice(1:10) %>%
  select(BORO, NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_staten, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in STATEN ISLAND")
Top 10 NTAs with the Highest Total Shooting Incidents in STATEN ISLAND
BORO NTA total_incidents total_population percent_poverty percent_education
STATEN ISLAND St. George-New Brighton 48 20549 18.5 79.9
STATEN ISLAND Mariner’s Harbor-Arlington-Graniteville 46 33492 16.8 83.3
STATEN ISLAND Tompkinsville-Stapleton-Clifton-Fox Hills 42 19027 25.5 79.2
STATEN ISLAND Rosebank-Shore Acres-Park Hill 34 25510 14.4 81.7
STATEN ISLAND West New Brighton-Silver Lake-Grymes Hill 26 37010 9.9 91.2
STATEN ISLAND Port Richmond 23 22609 22.0 83.2
STATEN ISLAND Grasmere-Arrochar-South Beach-Dongan Hills 8 36259 11.3 85.4
STATEN ISLAND Annadale-Huguenot-Prince’s Bay-Woodrow 6 40534 6.0 92.6
STATEN ISLAND New Dorp-Midland Beach 5 29083 8.5 84.6
STATEN ISLAND Arden Heights-Rossville 4 30683 4.3 92.3

Top 10 NTAs with the Highest Incident Rate in each boro

MANHATTAN

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_manhattan <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'MANHATTAN') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_manhattan <- top_10_incident_rate_manhattan %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_manhattan, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in MANHATTAN")
Top 10 NTAs with the Highest Incident Rate by Year in MANHATTAN
NTA BORO incident_rate total_population percent_poverty percent_education
Inwood MANHATTAN 0.115706 36299 15.3 77.0
East Harlem (North) MANHATTAN 0.086614 64655 32.3 75.9
Harlem (North) MANHATTAN 0.080406 83327 25.1 82.9
Inwood MANHATTAN 0.068872 36299 15.3 77.0
Harlem (South) MANHATTAN 0.065799 47113 18.8 87.4
Harlem (North) MANHATTAN 0.064805 83327 25.1 82.9
East Harlem (North) MANHATTAN 0.061867 64655 32.3 75.9
Harlem (South) MANHATTAN 0.057309 47113 18.8 87.4
Harlem (North) MANHATTAN 0.055204 83327 25.1 82.9
East Harlem (South) MANHATTAN 0.055171 59814 29.0 78.3

BROOKLYN

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_brooklyn <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'BROOKLYN') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_brooklyn <- top_10_incident_rate_brooklyn %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_brooklyn, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in BROOKLYN")
Top 10 NTAs with the Highest Incident Rate by Year in BROOKLYN
NTA BORO incident_rate total_population percent_poverty percent_education
Brownsville BROOKLYN 0.153795 60470 37.2 76.9
East New York-New Lots BROOKLYN 0.107539 53004 25.3 82.6
Brownsville BROOKLYN 0.099223 60470 37.2 76.9
Brownsville BROOKLYN 0.097569 60470 37.2 76.9
Bedford-Stuyvesant (East) BROOKLYN 0.088597 84653 22.2 87.8
East New York (North) BROOKLYN 0.079406 42818 25.5 79.4
East New York-New Lots BROOKLYN 0.079239 53004 25.3 82.6
East Flatbush-Remsen Village BROOKLYN 0.071584 39115 18.7 83.7
Ocean Hill BROOKLYN 0.071142 37952 27.3 83.1
Brownsville BROOKLYN 0.069456 60470 37.2 76.9

BRONX

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_bronx <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'BRONX') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_bronx <- top_10_incident_rate_bronx %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_bronx, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in BRONX")
Top 10 NTAs with the Highest Incident Rate by Year in BRONX
NTA BORO incident_rate total_population percent_poverty percent_education
Claremont Village-Claremont (East) BRONX 0.187350 24553 43.2 61.7
Tremont BRONX 0.118196 32150 36.3 71.4
Hunts Point BRONX 0.105743 15131 34.3 72.2
Mount Eden-Claremont (West) BRONX 0.100703 49651 31.8 67.9
Tremont BRONX 0.096423 32150 36.3 71.4
West Farms BRONX 0.094307 20147 44.8 61.1
Longwood BRONX 0.091836 40289 32.0 66.9
Mount Hope BRONX 0.089615 49099 30.2 66.8
West Farms BRONX 0.084380 20147 44.8 61.1
Morrisania BRONX 0.082431 37607 35.7 65.9

QUEENS

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_queens <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'QUEENS') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_queens <- top_10_incident_rate_queens %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_queens, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in QUEENS")
Top 10 NTAs with the Highest Incident Rate by Year in QUEENS
NTA BORO incident_rate total_population percent_poverty percent_education
South Jamaica QUEENS 0.069818 44401 13.4 78.9
Rockaway Beach-Arverne-Edgemere QUEENS 0.067687 41367 22.1 82.1
Far Rockaway-Bayswater QUEENS 0.064793 58648 18.3 79.9
Queensbridge-Ravenswood-Dutch Kills QUEENS 0.054622 32954 16.5 85.0
Laurelton QUEENS 0.053665 26088 5.4 91.2
Rockaway Beach-Arverne-Edgemere QUEENS 0.050765 41367 22.1 82.1
Richmond Hill QUEENS 0.046921 34100 10.6 79.8
Queensbridge-Ravenswood-Dutch Kills QUEENS 0.045518 32954 16.5 85.0
St. Albans QUEENS 0.044388 51816 8.6 88.8
Richmond Hill QUEENS 0.043988 34100 10.6 79.8

STATEN ISLAND

# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate_staten <- df_descriptive %>%
  distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
  filter(BORO == 'STATEN ISLAND') %>% 
  filter(NTAType == 'Residential') %>%
  arrange(desc(incident_rate_by_year_nta)) %>%
  slice(1:10)

# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_staten <- top_10_incident_rate_staten %>%
  select(NTA, BORO, incident_rate = incident_rate_by_year_nta,
         total_population = Total_population_nta,
         percent_poverty = Percent_poverty,
         percent_education = Percent_education)

# Display the table in the desired format
kable(top_10_nta_incident_rate_staten, digits = 6, caption = "Top 10 NTAs with the Highest Incident Rate by Year in STATEN ISLAND")
Top 10 NTAs with the Highest Incident Rate by Year in STATEN ISLAND
NTA BORO incident_rate total_population percent_poverty percent_education
St. George-New Brighton STATEN ISLAND 0.058397 20549 18.5 79.9
Tompkinsville-Stapleton-Clifton-Fox Hills STATEN ISLAND 0.052557 19027 25.5 79.2
Mariner’s Harbor-Arlington-Graniteville STATEN ISLAND 0.047773 33492 16.8 83.3
Port Richmond STATEN ISLAND 0.044230 22609 22.0 83.2
St. George-New Brighton STATEN ISLAND 0.043798 20549 18.5 79.9
St. George-New Brighton STATEN ISLAND 0.038931 20549 18.5 79.9
Mariner’s Harbor-Arlington-Graniteville STATEN ISLAND 0.038815 33492 16.8 83.3
Tompkinsville-Stapleton-Clifton-Fox Hills STATEN ISLAND 0.036790 19027 25.5 79.2
Rosebank-Shore Acres-Park Hill STATEN ISLAND 0.035280 25510 14.4 81.7
St. George-New Brighton STATEN ISLAND 0.034065 20549 18.5 79.9

Number of Incidents counted by CDTAs in each Boroughs from 2017 to 2023

boroughs <- unique(cdta_map_data$BoroName)
for (b in boroughs) {
    borough_data <- cdta_map_data %>%
        filter(BoroName == b)
    plot <- ggplot(data = borough_data) +
        geom_sf(aes(fill = Number_of_Incidents), color = "black") +
      geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") +  # Add labels
        scale_fill_gradientn(
      colors = c( "green", "yellow", "red"), # Custom color scale
      name = "Number of Incidents"
    ) +
        labs(
            title = paste("CDTA Incidents in", b),
            subtitle = "2017 to 2023",
            x = "Longitude",
            y = "Latitude"
        ) +
        theme_minimal()
    print(plot) 
}